data("instacart")
instacart =
instacart %>%
as_tibble(instacart)
instacart_tidy =
instacart %>%
janitor::clean_names() %>%
mutate(
day = order_dow + 1,
order_day_of_week = lubridate::wday(day, label = TRUE)
) %>%
select(order_id, user_id, reordered, order_day_of_week, order_hour_of_day, days_since_prior_order, product_name, aisle, department) %>%
drop_na()
First is a line plot shows the distributions of order time during a day.
instacart_tidy %>%
group_by(order_day_of_week, order_hour_of_day) %>%
summarize(count = n_distinct(order_id)) %>%
plot_ly(x = ~order_hour_of_day, y = ~count, type = "scatter", mode = "lines",
color = ~order_day_of_week, alpha = 0.8) %>%
layout(title = "Distributions of Order Time During a Day",
xaxis = list(title = "Time"),
yaxis = list(title = "Number of Orders"))
## `summarise()` has grouped output by 'order_day_of_week'. You can override using
## the `.groups` argument.
Second is a bar chart that shows the top 10 aisle with the greatest number of items ordered.
aisle_10 = instacart_tidy %>%
group_by(aisle) %>%
summarise(count = n()) %>%
mutate(aisle = str_to_title(aisle)) %>%
arrange(-count) %>%
top_n(10)
## Selecting by count
aisle_10 %>%
mutate(aisle = fct_reorder(aisle, -count)) %>%
plot_ly(x = ~aisle, y = ~count, color = ~aisle, type = "bar") %>%
layout(title = "Number of Items Ordered in Top 10 Popular Aisles",
xaxis = list(title = "Aisle"),
yaxis = list(title = "Number of items ordered"))
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
Third is a boxplot shows the distributions of order intervals in the top 10 popular aisles.
aisle_name_10 = aisle_10 %>%
pull(aisle)
instacart_tidy %>%
mutate(aisle = str_to_title(aisle)) %>%
filter(aisle %in% aisle_name_10) %>%
mutate(aisle = fct_reorder(aisle, days_since_prior_order)) %>%
plot_ly(x = ~aisle, y = ~days_since_prior_order, type = "box",
color = ~aisle, alpha = 0.5) %>%
layout(title = "Distributions of Order Interval in the Top 10 Popular Aisles",
xaxis = list(title = "Aisle"),
yaxis = list(title = "Days Since Prior Order"))
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors